## Create the anyDA pair matched design
## For this design as for all of the others we do not see any outcomes or do any outcome analysis.

options(scipen = 8, digits = 4, width = 132)

library(nbpMatching)
library(RItools)
library(optmatch)
library(MASS)
library(parallel)
library(dplyr)
library(designmatch)
library(gurobi) ## This takes some doing to install but is faster than highs package at this writing

source(here::here("Design", "nonbimatchingfunctions.R"))

load(here::here("Data", "wrkdatOwnMap_anyDA_new.rda"), verbose = TRUE)
load(here::here("Design", "dist_mats_anyDA_new.rda"), verbose = TRUE)

wdat0 <- wrkdatOwnMap_new %>%
  filter(!is.na(vm_change) & !is.na(social.capital01) & !is.na(community.resp01)) %>%
  droplevels()
dim(wdat0)
## Make simple objective distance (maybe move this to dist_mats_data_anyData_new.R
## because it overwrites that vmdaDist object)
vmdaDist <- scalar.dist(wdat0$da_prop_vm_20pct_06, scalefactor = 100000) ## the matching software wants integers
dimnames(vmdaDist) <- list(row.names(wdat0), row.names(wdat0))

## What are the kinds of differences we would see within pair?
## to create near and far criteria for the matching
# perceptions_mat <- scalar.dist(wdat0$vm.community.subj, int = FALSE)
# vm_change_mat <- scalar.dist(wdat0$vm_change, int = FALSE)
# area_mat <- scalar.dist(wdat0$community_area_km, int = FALSE)
# quantile(as.vector(perceptions_mat), seq(0, .1, .01))
# quantile(as.vector(vm_change_mat), seq(0, 1, .1))
## quantile(as.vector(csdPopDistMat), seq(0, 1, .1))
# quantile(as.vector(area_mat), seq(0, 1, .1))
# quantile(as.vector(area_mat), seq(.9, 1, .01))
# summary(wdat0$da_popdens_16)
# quantile(wdat0$da_popdens_16, seq(0, 1, .1))

## Match exactly within csd type
canada_exact <- list(covs = as.matrix(wdat0$csd_urban_06))

## Require matches to be within .02 on da vm and .03 on change in vm 2016 - 2006
canada_nearlist <- list(
  covs = as.matrix(wdat0[, c("da_prop_vm_20pct_06", "vm_change")]),
  pairs = c(
    da_prop_vm_20pct_06 = .02,
    vm_change = .03
  )
)

## What is the smallest non zero difference in perceptions --- for the "far" constraint
## i'm using min/10 just in case there was some rounding in the perceptions_mat cal
## min_perc_dist <- min(as.vector(perceptions_mat)[as.vector(perceptions_mat) != 0])
## it is .01 so just putting it here to avoid making more large matrices as we explore designs
min_perc_dist <- .01
canada_farlist <- list(
  covs = as.matrix(wdat0[, "vm.community.subj"]),
  pairs = c(vm.community.subj = min_perc_dist / 10)
)

solverlist <- list(name = "gurobi", approximate = 0, t_max = 10000, trace = 1)

system.time(
  res <- nmatch(
    dist_mat = vmdaDist,
    near = canada_nearlist,
    far = canada_farlist,
    exact = canada_exact,
    total_pairs = 2350,
    subset_weight = NULL,
    solver = solverlist
  )
)
str(res)

## 4042 obs with subset_weight=NULL total_pairs=2600 p=.19
## 3952 obs with subset_weight=NULL total_pairs=2500 p=.58
## 3834 obs with subset_weight=NULL total_pairs=2400 p=.64
## 3772 obs with subset_weight=NULL total_pairs=2350 p=.83
## 3686 obs with subset_weight=NULL total_pairs=2300 p=.99

## Convert nmatch output into a vector for use in analysis
wdat0$id <- row.names(wdat0)
canada_pairs_df <- nmatch_to_df(res, origid = wdat0$id)
nrow(canada_pairs_df)

# The nmatch_to_df function creates a column labeled "pair" which contains
## the matched set indicator
wdat <- inner_join(wdat0, canada_pairs_df, by = "id")
wdat <- droplevels(wdat)
stopifnot(nrow(wdat) == nrow(canada_pairs_df))
stopifnot(length(unique(wdat$pair)) == nrow(wdat) / 2)

## Checking to see that we didn't match any two people with identical perceptions
pairpercdiffs <- tapply(wdat$vm.community.subj, wdat$pair, function(x) {
  return(abs(diff(x)))
})
stopifnot(min(pairpercdiffs) > 0)

## This next looks at the continuous relationship with the variables that we
## really want to hold constant

thecovs <- c("da_prop_vm_20pct_06", "vm_change")
balfmla <- reformulate(thecovs, response = "vm.community.subj")

## Easier to think about "higher perceiver" versus "lower perceiver"
## when it comes to asking whether the matching worked.
wdat <- wdat %>%
  group_by(pair) %>%
  mutate(perc_more = rank(vm.community.subj) - 1) %>%
  ungroup()

## Further simplify the working file
wdat1 <- wdat %>%
  mutate(pairF = factor(pair)) %>%
  select(
    perc_more, da_prop_vm_20pct_06, vm_change, pair, pairF, social.capital01, community.resp01,
    vm.community.norm2, vm.community.subj, vcid, dauid, csd_pop_06, csd_prop_vm_20pct_06,
    da_pop_dens_06, csd_urban_06, csd_pop_dens_06, community_area_km
  )

xb1_ranked0 <- xBalance(perc_more ~ da_prop_vm_20pct_06 + vm_change,
  strata = list(unstr = NULL, pair = ~pair), report = "all", data = wdat1
)
xb1_ranked0$results
xb1_ranked0$overall
nrow(wdat0)
length(res$group_id)
nrow(wdat0) - length(res$group_id)

balfmla_ranked <- update(balfmla, perc_more ~ .)
xb1_ranked <- balanceTest(update(balfmla_ranked, . ~ . + strata(pair)), data = wdat1, p.adjust = "none")
xb1_ranked$overall
xb1_ranked$results[, c("Control", "Treatment", "adj.diff", "std.diff", "p"), ] # , "pair"]

## An alternative for the overall test which should be fairly close to it
library(formula.tools)
library(coin)
coin_fmla <- ~ vm.community.subj | pairF
lhs(coin_fmla) <- rhs(balfmla)
coin_test <- independence_test(coin_fmla, data = wdat1, teststat = "quadratic")
coin_test_perm <- independence_test(coin_fmla,
  data = wdat1, teststat = "quadratic",
  distribution = approximate(nresample = 1000)
)
coin::pvalue(coin_test)
coin::pvalue(coin_test_perm)

coin_fmla_rank <- da_prop_vm_20pct_06 + vm_change ~ perc_more | pairF
coin_test_rank <- independence_test(coin_fmla_rank, data = wdat1, teststat = "quadratic")
coin_test_rank_perm <- independence_test(coin_fmla_rank,
  data = wdat1,
  teststat = "quadratic", distribution = approximate(nresample = 1000)
)
coin::pvalue(coin_test_rank)
coin::pvalue(coin_test_rank_perm)

pairdiffs <- wdat1 %>%
  group_by(pair) %>%
  summarize(
    da_prop_vm_20pct_06 = abs(diff(da_prop_vm_20pct_06)),
    vm_change = abs(diff(vm_change)),
    vm.community.subj = abs(diff(vm.community.subj)),
    csd_pop_06 = abs(diff(csd_pop_06)),
    csd_pop_dens_06 = abs(diff(csd_pop_dens_06)),
    csd_prop_vm_20pct_06 = abs(diff(csd_prop_vm_20pct_06)),
    # da_pop_dens_06 = abs(diff(da_pop_dens_06)),
    community.area.km = abs(diff(community_area_km))
  )

sapply(pairdiffs, summary)

sapply(pairdiffs, function(x) {
  return(quantile(x, sort(unique(c(seq(0, 1, .1), seq(.9, 1, .01)))), na.rm = TRUE))
})

matches_anyDA_new <- wdat1[, c("vcid", "pair")]

save(matches_anyDA_new, file = here::here("Design", "matches_anyDA_new.rda"))


## Match without requiring differences in perceptions as a response to a reviewer

system.time(
  res_nofar <- nmatch(
    dist_mat = vmdaDist,
    near = canada_nearlist,
    # far = canada_farlist,
    exact = canada_exact,
    total_pairs = 2050,
    # subset_weight = 1,
    solver = solverlist
  )
)

## 2466 obs with 1233 pairs with same perceptions with subset_weight=NULL total_pairs=1500 p=.41
## 3296 obs with 1648 pairs with same perceptions with subset_weight=NULL total_pairs=2000 p=.22
## 3276 obs with 1638 pairs with same perceptions with subset_weight=NULL total_pairs=2050 p=.78
## 3626 obs with 1631 pairs with same perceptions with subset_weight=NULL total_pairs=2100 p=.67
## 3476 obs with 1738 pairs with same perceptions with subset_weight=NULL total_pairs=2125 p=.35
## 3320 obs with 1660 pairs with same perceptions with subset_weight=NULL total_pairs=2150 p=.20
## 3578 obs with 1789 pairs with same perceptions with subset_weight=NULL total_pairs=2200 p=.02
## 3660 obs with 1830 pairs with same perceptions with subset_weight=NULL total_pairs=2300 p=.38
## 3640 obs with 1820 pairs with same perceptions with subset_weight=NULL total_pairs=2400 p=.29
## 3904 obs with 1952 pairs with same perceptions with subset_weight=NULL total_pairs=2500 p=.28
## 4000 obs with 2000 pairs with same perceptions with subset_weight=NULL total_pairs=2600 p=.19
## 4064 obs with 2032 pairs with same perceptions with subset_weight=NULL total_pairs=2700 p=.11
## 4258 obs with 2129 pairs with same perceptions with subset_weight=NULL total_pairs=2800 p=.04
## 4374 obs with 2187 pairs with same perceptions with subset_weight=NULL total_pairs=2900 p=.11
## 4520 obs with 2260 pairs with same perceptions with subset_weight=NULL total_pairs=3000 p=.13
## 4514 obs with 2257 pairs with same perceptions with subset_weight=NULL total_pairs=3100 p=.05
str(res_nofar)

## Convert nmatch output into a vector for use in analysis
canada_pairs_df_nofar <- nmatch_to_df(res_nofar, origid = wdat0$id)
nrow(canada_pairs_df_nofar)

# The nmatch_to_df function creates a column labeled "pair" which contains
## the matched set indicator
wdat_nofar <- inner_join(wdat0, canada_pairs_df_nofar, by = "id")
wdat_nofar <- droplevels(wdat_nofar)
stopifnot(nrow(wdat_nofar) == nrow(canada_pairs_df_nofar))
stopifnot(length(unique(wdat_nofar$pair)) == nrow(wdat_nofar) / 2)

## Checking to see the kinds of differences in perceptions we see
pairpercdiffs_nofar <- tapply(wdat_nofar$vm.community.subj, wdat_nofar$pair, function(x) {
  abs(diff(x))
})
summary(pairpercdiffs_nofar)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
#   0.000   0.070   0.160   0.244   0.320   4.190
quantile(pairpercdiffs_nofar, seq(0, 1, .1))
#   0%  10%  20%  30%  40%  50%  60%  70%  80%  90% 100%
# 0.00 0.03 0.06 0.09 0.13 0.18 0.23 0.30 0.40 0.57 4.19
mean(pairpercdiffs_nofar == 0)
# [1] 0.02348

## Easier to think about "higher perceiver" versus "lower perceiver"
## when it comes to asking whether the matching worked.
wdat_nofar <- wdat_nofar %>%
  group_by(pair) %>%
  mutate(perc_more = rank(vm.community.subj) - 1) %>%
  ungroup()

table(wdat_nofar$perc_more, exclude = c())

## Further simplify the working file
wdat_nofar1 <- wdat_nofar %>%
  mutate(pairF = factor(pair)) %>%
  select(
    perc_more, da_prop_vm_20pct_06, vm_change, pair, pairF, social.capital01, community.resp01,
    vm.community.norm2, vm.community.subj, vcid, dauid, csd_pop_06, csd_prop_vm_20pct_06,
    da_pop_dens_06, csd_urban_06, csd_pop_dens_06, community_area_km
  )

## Have to exclude people who perceive the same
xb1_ranked0_nofar <- xBalance(perc_more ~ da_prop_vm_20pct_06 + vm_change,
  strata = list(unstr = NULL, pair = ~pair), report = "all",
  data = droplevels(wdat_nofar1[wdat_nofar$perc_more != .5, ])
)
xb1_ranked0_nofar$results
xb1_ranked0_nofar$overall
nrow(wdat0)
length(res_nofar$group_id)
nrow(wdat0) - length(res_nofar$group_id)

## This next doesn't work without excluding people who perceive the same amount
## xb1_ranked_nofar <- balanceTest(update(balfmla_ranked, . ~ . + strata(pair)), data = wdat_nofar1, p.adjust = "none")
## xb1_ranked_nofar$overall
## xb1_ranked_nofar$results[, c("Control", "Treatment", "adj.diff", "std.diff", "p"), ] # , "pair"]

## An alternative for the overall test which should be fairly close to it
coin_test_nofar <- independence_test(coin_fmla, data = wdat_nofar1, teststat = "quadratic")
coin_test_nofar_perm <- independence_test(coin_fmla,
  data = wdat_nofar1, teststat = "quadratic",
  distribution = approximate(nresample = 1000)
)
coin::pvalue(coin_test_nofar)
coin::pvalue(coin_test_nofar_perm)

coin_fmla_rank <- da_prop_vm_20pct_06 + vm_change ~ perc_more | pairF
coin_test_rank <- independence_test(coin_fmla_rank, data = wdat_nofar1, teststat = "quadratic")
coin_test_rank_perm <- independence_test(coin_fmla_rank,
  data = wdat_nofar1,
  teststat = "quadratic", distribution = approximate(nresample = 1000)
)
coin::pvalue(coin_test_rank)
coin::pvalue(coin_test_rank_perm)

pairdiffs <- wdat_nofar1 %>%
  group_by(pair) %>%
  summarize(
    da_prop_vm_20pct_06 = abs(diff(da_prop_vm_20pct_06)),
    vm_change = abs(diff(vm_change)),
    vm.community.subj = abs(diff(vm.community.subj)),
    csd_pop_06 = abs(diff(csd_pop_06)),
    csd_pop_dens_06 = abs(diff(csd_pop_dens_06)),
    csd_prop_vm_20pct_06 = abs(diff(csd_prop_vm_20pct_06)),
    # da_pop_dens_06 = abs(diff(da_pop_dens_06)),
    community.area.km = abs(diff(community_area_km))
  )

sapply(pairdiffs, summary)

sapply(pairdiffs, function(x) {
  quantile(x, sort(unique(c(seq(0, 1, .1), seq(.9, 1, .01)))), na.rm = TRUE)
})


matches_anyDA_new_nofar <- wdat_nofar1[, c("vcid", "pair")]

save(matches_anyDA_new_nofar, file = here::here("Design", "matches_anyDA_new_nofar.rda"))
